/**
* Regular expression to match HTML/XML attribute pairs within a tag.
- * Allows some... latitude.
+ * Allows some... latitude. Based on,
+ * http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
* Used in Sanitizer::fixTagAttributes and Sanitizer::decodeTagAttributes
* @return string
*/
if ( self::$attribsRegex === null ) {
$attribFirst = '[:A-Z_a-z0-9]';
$attrib = '[:A-Z_a-z-.0-9]';
- $space = '[\x09\x0a\x0d\x20]';
+ $space = '[\x09\x0a\x0c\x0d\x20]';
self::$attribsRegex =
"/(?:^|$space)({$attribFirst}{$attrib}*)
($space*=$space*
(?:
# The attribute value: quoted or alone
- \"([^<\"]*)(?:\"|\$)
- | '([^<']*)(?:'|\$)
- | ([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+)
+ \"([^\"]*)(?:\"|\$)
+ | '([^']*)(?:'|\$)
+ | (((?!$space|>).)*)
)
)?(?=$space|\$)/sx";
}
!! end
-# FIXME: Preserve the attribute properly (with an empty string as value) in
-# the PHP parser. Parsoid implements the behavior below.
!! test
Table attributes with empty value
!! wikitext
{|
| style=| hello
|}
+!! html/php
+<table>
+<tr>
+<td style=""> hello
+</td></tr></table>
+
!! html/parsoid
<table>
<tbody>
!!end
-# FIXME: produce empty string instead of "class" in the PHP parser, following
-# the HTML5 spec.
!! test
div with empty attribute value, space before equals
-!! options
-parsoid
!! wikitext
<div class =>HTML rocks</div>
!! html
!! end
+# FIXME: Parsoid doesn't match the html5 spec
!! test
div with multiple empty attribute values
!! options
-parsoid
+parsoid=wt2html,html2html
!! wikitext
<div id= title=>HTML rocks</div>
-!! html
-<div id="" title="">HTML rocks</div>
+!! html/php
+<div id="title.3D">HTML rocks</div>
+!! html/parsoid
+<div id="" title="">HTML rocks</div>
!! end
+# FIXME: Parsoid doesn't match the html5 spec
!! test
table with multiple empty attribute values
!! options
-parsoid
+parsoid=wt2html,html2html
!! wikitext
{| title= id=
| hi
|}
-!! html
+!! html/php
+<table title="id=">
+<tr>
+<td> hi
+</td></tr></table>
+
+!! html/parsoid
<table title="" id="">
<tbody><tr><td> hi</td></tr>
</tbody></table>
<div title="{}">Foo</div>
!! end
-# This it very inconsistent in the PHP parser: it returns
-# class="class" if there is a space between the name and the equal sign (see
-# 'div with empty attribute value, space before equals'), but strips the
-# attribute completely if the space is missing. We hope that not much content
-# depends on this, so are implementing the behavior below in Parsoid for
-# consistencies' sake.
-# FIXME: fix this behavior in the PHP parser?
!! test
div with empty attribute value, no space before equals
!! options
!! wikitext
<div class=>HTML rocks</div>
!! html/php
-<div>HTML rocks</div>
+<div class="">HTML rocks</div>
!! html/parsoid
<div class="">HTML rocks</div>
!! wikitext
<font color=>foo</font>
!! html
-<p><font>foo</font>
+<p><font color="">foo</font>
</p>
!! end
!! test
HTML tag with broken attribute value quoting
+!! options
+parsoid=wt2html,html2html
!! wikitext
<span title="Hello world>Foo</span>
!! html/php
<p><span title="Hello world">Foo</span>
</p>
!! html/parsoid
-<p><span title="Hello world">Foo</span>
-</p>
+<p><span title="Hello world">Foo</span></p>
!! end
!! test
!! test
Table with broken attribute value quoting
+!! options
+parsoid=wt2html,html2html
!! wikitext
{|
| title="Hello world|Foo
!! test
Table with broken attribute value quoting on consecutive lines
+!! options
+parsoid=wt2html,html2html
!! wikitext
{|
| title="Hello world|Foo